home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Skunkware 5
/
Skunkware 5.iso
/
src
/
Tools
/
glimpsehttp
/
cgi-bin
/
news
< prev
Wrap
Text File
|
1995-05-16
|
6KB
|
229 lines
#!/usr/local/bin/perl
#
# Acknowledgements
#
# Thanks to Guy Brooker (guy@jw.estec.esa.nl) for his AA interface,
# which was the starting point for this program.
#
# Paul Clark
# paul@cs.arizona.edu
#
# Modifications
#
# 2/22/94 Version 1.0, shell script version Paul Clark
# 4/21/94 Version 1.1, multiple archives support Paul Clark
# 4/22/94 Version 1.2, perl script Paul Clark
# 4/26/94 rewritten from aglimpse script
# **** **** **** **** CONFIGURABLE VARIABLES **** **** **** ****
$HTTPD_HOME="/usr1/paul/httpd" ;
$HTTPD_NEWSHOME="/usr1/paul/news" ;
$GLIMPSE_LOC="/usr/paul/bin/glimpse" ;
# **** **** **** **** NO CONFIGURATION NEEDED BELOW **** **** **** ****
$FSSERV="/cgi-bin/article" ;
# To support an ISINDEX type search, set query string if given
# an argument on the command line
if ( $#ARGV >= 0 ) {
$prefix= "whole=on&case=on&query=";
}
if ( $ENV{'PATH_INFO'} ) {
# old-fashioned way to give newsgroup
$newsgroup = substr($ENV{'PATH_INFO'},1);
$newsgroup =~ s|"||g;
$prefix= "group=" . $newsgroup . "&". $prefix;
}
# Check that a query has been made
$query = $ENV{'QUERY_STRING'};
$query =~ s|"||g;
# Strip the variables out from the query string,
# and assign them into variables, prefixed by 'QS_'
@qvars = split( /\&/, $prefix . $query );
#print "Content-type: text/plain\n\n" ; #debug
foreach (@qvars) {
split(/=/);
$fname = $_[0];
$fvalue = $_[1];
$cmd = "\$QS_$fname = \"$fvalue\";" ;
# print "$cmd\n"; #debug
$cmd = eval $cmd if ( $fname =~ /^[a-z_A-Z]\w*$/ );
}
$newsgroup = $QS_group;
$newsgroup =~ tr/A-Z/a-z/;
$indexdir = $HTTPD_NEWSHOME . "/indices/" . $newsgroup;
$ENV{'HOME'} = $indexdir; # some versions of Glimpse need it
# Ensure that Glimpse is available on this machine
-x $GLIMPSE_LOC || &err_noglimpse ;
# Ensure that index is available
-r "$indexdir/.glimpse_index" || &err_noindex ;
$QS_query =~ s|\+| |g;
$QS_query =~ s/%(\w\w)/sprintf("%c", hex($1))/ge;
$QS_query =~ s|"||g;
#if no query has been made, provide ISINDEX type of reply
$QS_query || &err_noquery ;
$OPT_errors="-$QS_errors" if $QS_errors =~ /^[0-8]$/;
$OPT_errors="-B -y" if $QS_errors =~ /^Best\+match$/;
$OPT_case="-i" if $QS_case =~ /^on$/;
$OPT_whole="-w" unless $QS_whole =~ /^on$/;
if ($QS_maxlines =~ /\d+/) {
$maxlines = $&;
} else {
$maxlines = 20;
}
if ($QS_maxfiles =~ /\d+/) {
$maxfiles = $&;
} else {
$maxfiles = 100;
}
print "Content-type: text/html\n\n" ;
print "<HEAD><TITLE>Search for \"$QS_query\" in \"$newsgroup\"\n";
print "</TITLE></HEAD><BODY>\n";
print "<H1>Glimpse search for \"$QS_query\"<BR>".
"in newsgroup \"$newsgroup\"</H1><HR>\n";
chdir $indexdir;
unlink <.glimpse_tmp*> ;
$cmd = "exec $GLIMPSE_LOC -z -y -n $OPT_case $OPT_whole $OPT_errors -H ." .
"$OPT_filter \"$QS_query\" 2>&1 |";
# print $cmd,"\n",`pwd`;
$gpid = open(GOUT, $cmd );
$prevfile = "";
$lcount = 0;
$fcount = 0;
line: while (<GOUT>) {
s/&/\&/g;
s/</\</g;
s/>/\>/g;
( /^([^ :]*):\s*([0-9][0-9]*):(.*)/ ) || next;
$file = $1;
$line = $2;
$string = $3;
$file =~ s|.*groups(/[^/]*/[^/]*)$|$1|;
if ($file ne $prevfile) {
$fcount++ ;
$linecount = -1;
if ($fcount>$maxfiles) {
print "<H3>Limit of $maxfiles " .
"articles exceeded...</H3>\n";
$file = "";
$fcount = "at least $fcount";
$lcount = "at least $lcount";
last line;
}
print "</UL>" if ( $prevfile ne "" );
$prevfile = $file ;
print "<H3>Article <A HREF=\"",$FSSERV,$file,
"\">",$file,"</A></H3><UL>\n" ;
}
$lcount++ ;
$linecount++;
if ($linecount>=$maxlines) {
print "<LI>Limit of $maxlines matched " .
"lines per file exceeded...\n" if
$linecount==$maxlines;
next line;
}
print "<LI><A HREF=\"",$FSSERV,$file,"?",$line,"#mfs\">\n" ;
print "line ",$line,":",$string,"</A>\n" ;
}
print "</UL>\n" if $file ;
print "<HR>" ;
print "<H3>Summary for query <code>\"",$QS_query,"\"</code>:</H3>\n" ;
print "found ",$lcount," matches in ",$fcount," articles\n" ;
print "</BODY>\n" ;
close(GOUT);
unlink "/tmp/.glimpse_tmp.$gpid";
sub diag_exit {
# exit on error
exit 1;
}
sub err_noquery {
# The script was called without a query.
# Provide an ISINDEX type response for browsers
# without form support.
print "Content-type: text/html\n\n";
print "<HEAD><TITLE>Search newsgroup \"$newsgroup\"</TITLE></HEAD>\n";
print "<BODY><H1> Search newsgroup \"$newsgroup\" </H1>\n";
print "Welcome to the gateway to \"$newsgroup\".\n";
print <<'EOM' ;
Type a pattern to search in your browser's
search dialog. Query is case-insensitive by default<P>
<ISINDEX>
<H2>IMPORTANT !</H2>
<QUOTE>
<UL>
<LI>This is an experimental service. Therefore, its functionality
may change or discontinue at any moment without prior notice.
<LI>Please report any malfunctions of this gateway to the
address below.
<LI>All access operations on the server are logged.
If you discover any security leaks, don't use them - report them to
the address below.
</UL>
</QUOTE>
<HR>
<ADDRESS>
Your name<BR>
login@your.host.here<BR>
</ADDRESS>
</BODY>
EOM
&diag_exit;
}
sub err_noglimpse {
#
# Glimpse was not found
# Report a useful message
#
print <<'EOM' ;
Content-type: text/html
<HEAD>
<TITLE>Glimpse not found</TITLE>
</HEAD>
<BODY>
<H1>Glimpse not found</H1>
This gateway relies on <CODE>Glimpse</CODE> search tool.
If it is installed, please set the correct path in the script file.
Otherwise obtain the latest version from
<A HREF="file://ftp.cs.arizona.edu/glimpse">ftp.cs.arizona.edu</A>
</BODY>
EOM
&diag_exit;
}
sub err_noindex {
print "Content-type: text/html\n\n";
print "<HEAD><TITLE>Newsgroup $newsgroup not found</TITLE>\n";
print "</HEAD> <BODY>\n";
print "<H1>Newsgroup $newsgroup not found</H1>\n";
print <<'EOM' ;
Please remember: this is NOT a news server. It has only a few
newsgroups. Please use it ONLY for the search of a particular article,
NOT for reading the news.
</BODY>
EOM
&diag_exit;
}